# DESCRIPTIVES ---------------------------------------------------------------

# Perceived purpose ---------------------------------------------------------------

# Get list of factors to lump together
purposes_to_keep <- purpose %>%
  expand_select_multiple(
    purpose_2_label_list
  ) %>%
  pivot_longer(matches("purpose_2_label_list_"), names_prefix = "purpose_2_label_list_") %>%
  group_by(name) %>%
  summarise(mean_cl_cluster(value, cluster = group_id)) %>%
  ungroup %>%
  arrange(desc(estimate)) %>%
  filter(row_number() <= 9 & name != "Other (specify)") %>%
  pull(name)

purpose_0_bar <- purpose %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
  select(discuss_type, discuss_type_label, group_id, matches("purpose")) %>%
  expand_select_multiple(
    purpose_0_label_list
  ) %>%
  pivot_longer(matches("purpose_0_label_list_"), names_prefix = "purpose_0_label_list_") %>%
  mutate(name_lumped = ifelse(name %in% purposes_to_keep, name, "Other")) %>%
  group_by(name_lumped, discuss_type, discuss_type_label) %>%
  summarise(mean_cl_cluster(value, cluster = group_id)) %>%
  ungroup

purpose_2_bar <- purpose %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
  select(discuss_type, discuss_type_label, group_id, matches("purpose")) %>%
  expand_select_multiple(
    purpose_2_label_list
  ) %>%
  pivot_longer(matches("purpose_2_label_list_"), names_prefix = "purpose_2_label_list_") %>%
  mutate(name_lumped = ifelse(name %in% purposes_to_keep, name, "Other")) %>%
  group_by(name_lumped, discuss_type, discuss_type_label) %>%
  summarise(mean_cl_cluster(value, cluster = group_id)) %>%
  ungroup

# ?fct_relevel
bind_rows(
  "1. After outcome round" = purpose_0_bar,
  "2. End of survey" = purpose_2_bar,
  .id = "purpose_type"
) %>%

  mutate(name_lumped = fct_reorder(name_lumped, estimate) %>% fct_relevel("Other")) %>%

  ggplot(aes(x = name_lumped, y = estimate, fill = discuss_type_label)) +
  geom_col(position = position_dodge(0.7), width = 0.7) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), colour = "#5A5A5A", width = 0, position = position_dodge(0.8)) +
  facet_wrap(~ purpose_type) +
  coord_flip() +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "top") +
  scale_fill_brewer(palette = "Set2", direction = -1) +
  labs(y = "% of participants who cite this reason", fill = element_blank(), x = element_blank()) +
  scale_y_continuous(labels = scales::percent_format(), breaks = c(0, 0.25, 0.5))

ggsave("outputs/figs/purpose_barchart.pdf", width = 8, height = 4)


# F-test for whether purpose of experiment can predict discussion type

# Create dataset with all purposes as a single variable
df_purpose_expanded <- purpose %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
  select(discussion_full, discuss_type, discuss_type_label, group_id, matches("purpose")) %>%
  expand_select_multiple(
    purpose_0_list
  ) %>%
  expand_select_multiple(
    purpose_2_list
  ) %>%
  select(-purpose_0_list, -purpose_2_list) %>%
  select(-purpose_0_list__99, -purpose_2_list__99)

purpose_0_vars <- df_purpose_expanded %>% names %>% str_subset("purpose_0_list")
purpose_2_vars <- df_purpose_expanded %>% names %>% str_subset("purpose_2_list")

f_purpose_0 <- feols_custom(
  as.formula(paste0("discussion_full ~ ", paste(purpose_0_vars, collapse = " + "))),
  data = df_purpose_expanded,
  cluster = "group_id"
) %>%
  f_test("purpose_0_list")

f_purpose_2 <- feols_custom(
  as.formula(paste0("discussion_full ~ ", paste(purpose_2_vars, collapse = " + "))),
  data = df_purpose_expanded,
  cluster = "group_id"
) %>%
  f_test("purpose_2_list")

f_purpose_0$stat %>% write_stat("outputs/stats/f_purpose_0.tex", 1)
f_purpose_0$p %>% write_stat("outputs/stats/f_purpose_0_p.tex", 3,  p_value = TRUE)

f_purpose_2$stat %>% write_stat("outputs/stats/f_purpose_2.tex", 1)
f_purpose_2$p %>% write_stat("outputs/stats/f_purpose_2_p.tex", 2,  p_value = TRUE)

# SDB ---------------------------------------------------------------

sdb_score

sdb_long %>% make_wide(val_rev) %>% corr_plot()
sdb_long %>% make_wide(val_rev)

n_sdb <- sdb_score %>%
  filter(!is.na(sdb_score)) %>%
  nrow(.)


# Leadership scale ---------------------------------------------------------------

ls %>% select(KEY, person, name, val_rev) %>%
  pivot_wider(names_from = name, values_from = val_rev) %>%
  select(-KEY, -person) %>%
  corr_plot()

ls %>% select(KEY, person, name, val_acqui) %>%
  pivot_wider(names_from = name, values_from = val_acqui) %>%
  select(-KEY, -person) %>%
  corr_plot()

ls_corr_plot_input <- ls %>% select(KEY, person, name, val_acqui) %>%
  left_join(
    tibble(
      name = c("ls1", "ls2_REV", "ls3", "ls6_REV", "ls7", "ls8", "ls9"),
      label = c("Confident", "Quiet (-)", "Leader", "Shy (-)", "Talkative", "Admirable", "Inspiring")
    )
  ) %>%
  select(-name) %>%
  pivot_wider(names_from = label, values_from = val_acqui) %>%
  select(-KEY, -person) %>%
  cor(use = "pairwise.complete.obs") %>%
  round(3)

ls_corr_plot_input %>%
  corrplot::corrplot(
    method = "circle",
    type="lower",
    tl.col="black",
    col = COL2(n = 10)
  )


ls_labels <-  tibble(
  name = c("ls1", "ls2_REV", "ls3", "ls6_REV", "ls7", "ls8", "ls9"),
  label = c("Confident", "Quiet (-)", "Leader", "Shy (-)", "Talkative", "Admirable", "Inspiring")
) %>%
  mutate(var_i = str_replace_all(name, "ls|_REV", "") %>% as.integer)

ls_loadings <- ls %>%
  filter(phase == "phase_2") %>%
  select(KEY, person, name, val_acqui) %>%
  left_join(ls_labels) %>%
  select(-name, -var_i) %>%
  pivot_wider(names_from = label, values_from = val_acqui) %>%
  select(-KEY, -person) %>%
  factor_loadings(n_factors = 1)


ls_loadings %>%
  left_join(ls_labels, by = c("var" = "label")) %>%
  arrange(var_i) %>%
  select(-name, -var_i) %>%
  pull(MR1) %>%
  round(2) %>%
  paste(collapse = ", ") %>%
  paste0("(", ., ")") %>%
  writeLines("outputs/stats/ls_loadings.tex")

# What is correlation across different people's measures?

# 1. Basic correlation
ls_corr_basic <- ls_scores %>%
  select(ls_ind_id, ls_score_fact_z) %>%
  group_by(ls_ind_id) %>%
  mutate(rating = str_glue("rater_{row_number()}")) %>%
  pivot_wider(names_from = "rating", values_from = "ls_score_fact_z") %>%
  ungroup %>%
  select(-ls_ind_id, -rater_3) %>%
{cor.test(.$rater_1, .$rater_2, use = "pairwise.complete")}

 ls_corr_basic$estimate %>% write_stat("outputs/stats/ls_corr_basic.tex", digits = 2)
 
ls_corr_basic$p.value %>%
{
  if (. < 0.001) { "p$<$0.001%" } else { paste0("p=", as.character(round(., 3)), ")") }
} %>%
  writeLines("outputs/stats/ls_corr_basic_p.tex")

# 2. Residual after accounting for FEs of each individual
ls_corr_fes <- ls_scores %>%
  group_by(ind_id) %>%
  mutate(rater_fe = mean_na(ls_score_fact_z)) %>%
  ungroup %>%
  mutate(resid = ls_score_fact_z - rater_fe) %>%
  arrange(ls_ind_id) %>%

  select(ls_ind_id, resid) %>%
  group_by(ls_ind_id) %>%
  mutate(rating = str_glue("rater_{row_number()}")) %>%
  pivot_wider(names_from = "rating", values_from = "resid") %>%
  ungroup %>%
  select(-ls_ind_id, -rater_3) %>%
{cor.test(.$rater_1, .$rater_2, use = "pairwise.complete")}


ls_corr_fes$estimate %>% write_stat("outputs/stats/ls_corr_fes.tex", digits = 2)

ls_corr_fes$p.value %>%
{
  if (. < 0.001) {
    "p$<$0.001"
  } else {
    str_glue("p={as.character(round(., 3))}")
  }
} %>%
  paste0(., "%") %>%
  writeLines("outputs/stats/ls_corr_fes_p.tex")

# Correlation betwen leadership scale and dominance in discussion

# Avg ls_score for each person (average across both raters)
ls_scores_avg <- ls_scores %>%
  group_by(ind_id) %>%
  summarise(ls_score_fact_z = mean_na(ls_score_fact_z)) %>%
  ungroup

# Merge discussion obs and ls_scores_avg
ls_and_discuss <- discuss_obs_person %>%
  left_join(ls_scores_avg, by = "ind_id") %>%
  filter(phase == "phase_2")

# Salience  ---------------------------------------------------------------

salience_checks <- list(
  "Selected trans in outcome round" = feols_custom(
    r2_choose_trans ~ discuss_type_label + trans_remembered + above_median_prop_salience + video_type  + factor(stratum_id),
    data = r2_with_salience %>% filter(!is.na(r2_choose_trans)),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair"),
    cluster = "group_id"
  ),
  "Selected trans in outcome round" = feols_custom(
    r2_choose_trans ~ discuss_type_label + trans_remembered + prop_salience_minus_trans + video_type  + factor(stratum_id),
    data = r2_with_salience %>% filter(!is.na(r2_choose_trans)),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair"),
    cluster = "group_id"
  )
)

tex_export(
  salience_checks,
  file = "outputs/tables/salience_checks.tex",
  coef_omit = "stratum_id|Intercept|video_type",
  coef_rename = coef_label,
  gof_map = fe_label
)


salience %>% filter(discuss_type == "control") %>% pull(trans_remembered) %>% mean_na() %>% write_percentage("outputs/stats/salience_trans.tex")

salience %>% filter(discuss_type == "control") %>% pull(prop_salience_minus_trans) %>% mean_na() %>% write_percentage("outputs/stats/salience_non_trans.tex")


# List experiment ---------------------------------------------------------------

questions_phase_1 %>%
  filter(str_detect(name, "^l\\d\\d?$")) %>%
  filter(!(name %in% c("l4", "l11"))) %>%
  mutate(i = str_extract(name, "\\d\\d?") %>% as.numeric()) %>%
  mutate(list = ifelse(i <= 6, "List A", "List B")) %>%
  group_by(list) %>%
  mutate(`Question number` = row_number()) %>%
  select(-i) %>%
  select(-name, -type, -choices) %>%
  select(List = list, `\\#` = `Question number`, Question = label) %>%
  mutate(List = ifelse(List == lag(List) & !is.na(lag(List)), "", List)) %>%

  xtable(caption = "List experiment statements", label = "tab_list_experiment_questions") %>%
  print(file = "outputs/tables/list_experiment_questions.tex",
        include.rownames = FALSE, sanitize.text.function = function(x) x,
        hline.after = c(-1, 0, nrow(.), 5),
        scalebox=0.9,
        caption.placement = "top")

# LIST EXPERIMENT VALIDATION CHECK

list_exp_validation <- list(
  "Trans statement in list A" = feols_custom(
    list_answer ~ trans_in_list_group + trans_in_list_group_group_labeldiscuss + video_type + factor(list_order_first) + trans_in_list_group,
    fixef = c("stratum_id", "video_type", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("trans_in_list_group_group_labeldiscuss"),
      interact = NULL,
      group_control = FALSE
    ),
    data = list_exp %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(list_b == 0),
  ),
  "Trans statement in list B" = feols_custom(
    list_answer ~ trans_in_list_group + trans_in_list_group_group_labeldiscuss + video_type + factor(list_order_first) + trans_in_list_group,
    fixef = c("stratum_id", "video_type", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("trans_in_list_group_group_labeldiscuss"),
      interact = NULL,
      group_control = FALSE
    ),
    data = list_exp %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(list_b == 1),
    warn = FALSE, notes = FALSE
  ),
  "Pooled" = feols_custom(
    list_answer ~ trans_in_list_group + trans_in_list_group_group_labeldiscuss + video_type + factor(list_order_first) + list_b + trans_in_list_group,
    fixef = c("ind_id", "stratum_id", "video_type", "list_b", "phase"),
    cluster = ~ ind_id,
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("trans_in_list_group_group_labeldiscuss"),
      interact = NULL,
      group_control = FALSE
    ),
    data = list_exp %>% filter(discuss_type %in% c("control", "discussion_full")),
    warn = FALSE, notes = FALSE
  )
)

names(list_exp_validation) <- paste0(names(list_exp_validation), " (Obs=",
                                     list_exp_validation %>% map_dbl(~ nrow(.x$data)), ")")

list_exp_validation %>%
  modelplot(coef_omit = vars_to_regex("stratum|video|factor\\(list_order_first\\)|list_b", control_vars),
            coef_rename = c("trans_in_list_group_group_labeldiscuss" = "Anti-trans statement in list x 3-person discussion",
                            "trans_in_list_group" = "Anti-trans statement in list")) +
  geom_vline(xintercept = 0, colour = "darkgrey") +

  # Make colours all black
  scale_colour_manual(values = c("indianred", "skyblue", "darkgrey")) +
  coord_cartesian(xlim = c(-0.2, 0.6))

ggsave("outputs/figs/list_exp_validation.pdf", width = 8, height = 4, scale = 1)


# Each list coefficient:
list_exp_validation$`Trans statement in list A` %>%
  tidy() %>% filter(term == "trans_in_list_group_group_labeldiscuss") %>%
  pull(estimate) %>%
  write_stat("outputs/stats/list_exp_coeff_list_a.tex", digits = 3)

list_exp_validation$`Trans statement in list B` %>%
  tidy() %>% filter(term == "trans_in_list_group_group_labeldiscuss") %>%
  pull(estimate) %>%
  write_stat("outputs/stats/list_exp_coeff_list_b.tex", digits = 3)

# P of diff
feols_custom(
  list_answer ~ trans_in_list_group + trans_in_list_group_group_labeldiscuss * list_b + trans_in_list_group:video_type + factor(list_order_first) + factor(stratum_id)*trans_in_list_group,
  fixef = c("stratum_id"),
  cluster = ~ group_id,
  data = list_exp %>% filter(discuss_type %in% c("control", "discussion_full")),
  warn = FALSE, notes = FALSE
) %>%
  tidy() %>%
  filter(term == "trans_in_list_group_group_labeldiscuss:list_b") %>%
  .$p.value %>%
  write_stat("outputs/stats/list_exp_p_diff.tex", digits = 2)

list_exp %>% count_prop(list_b, trans_in_list_group, list_answer)

# Trans recognition ---------------------------------------------------------------


trans_recog <- supp %>%
  tidylog::filter(survey_completed == 1) %>%
  select(trans_recog, matches("trans_yn_"), matches("hiring_photo_final_")) %>%

  tidylog::filter(!(trans_recog %in% c("-99", "-98", "-97"))) %>%
  tidylog::filter(!is.na(trans_recog)) %>%
  pivot_longer(
    matches("trans_yn_|hiring_photo_final_"),
    names_to = c(".value", "round"),
    names_pattern = "(.*)_(\\d\\d?$)",
  ) %>%
  rename(is_trans = trans_yn,
         photo_i = round) %>%
  mutate(
    trans_recog_list = str_split(trans_recog, " ")
  ) %>%
  rowwise() %>%
  mutate(photo_guessed = photo_i %in% trans_recog_list) %>%
  ungroup %>%
  mutate(is_trans = as.logical(is_trans))

# CONFUSION MATRIX
library(magrittr)

trans_recog %$%
  table(photo_guessed, is_trans) %>%
{ print(xtable::xtable(., digits = 2), file = "outputs/tables/trans_recog_matrix.tex", floating = FALSE) }


trans_recog %$%
  table(photo_guessed, is_trans) %>%
{. / nrow(trans_recog)} %>%
  round(2) %>%
  print

# Create the cross-tabulation

cross_tab <- table(trans_recog$photo_guessed, trans_recog$is_trans)

# Convert the cross-tabulation to a matrix and add row and column totals
cross_tab_matrix <- addmargins(as.matrix(cross_tab))


# Create the cross-tabulation
cross_tab <- table(trans_recog$photo_guessed, trans_recog$is_trans)

# Convert the cross-tabulation to a matrix and add row and column totals
cross_tab_matrix <- addmargins(as.matrix(cross_tab))

names(dimnames(cross_tab_matrix)) <- c('Participant Guess','Photo')
rownames(cross_tab_matrix) <- c("Male or female", "Transgender", "Total")
colnames(cross_tab_matrix) <- c("Male or female", "Transgender", "Total")

print(cross_tab_matrix)

# Create LaTeX table using kable
latex_table <- kable(cross_tab_matrix, format = "latex",
                     # file = "outputs/tables/trans_recog_matrix.tex",
                     booktabs = TRUE, caption = "Cross-Tabulation of Participant Guesses and Photos") %>%
  kableExtra::add_header_above(c(" " = 1, "Photo" = 3)) %>%
  kableExtra::kable_styling() %>%
  str_replace_all(
    "& Male or female & Transgender & Total\\\\",
    "\\\\textit{Participant guess} & Male or female & Transgender & Total\\\\"
  ) %>%
  str_replace_all("Photo", "\\\\textit{Correct gender:}") %>%
  str_replace_all("\\\\begin\\{table\\}\n", "") %>%
  str_replace_all("\\\\end\\{table\\}", "") %>%
  str_replace_all(fixed("\\multicolumn{2}{l}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01}\\\\"), "") %>%
  str_replace_all(fixed("\\multicolumn{3}{l}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01}\\\\"), "") %>%
  str_replace_all(fixed("\\multicolumn{4}{l}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01}\\\\"), "") %>%
  str_replace_all(fixed("\\multicolumn{5}{l}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01}\\\\"), "") %>%
  str_replace_all(fixed("\\multicolumn{6}{l}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01}\\\\"), "") %>%
  str_replace_all(fixed("\\multicolumn{7}{l}{\\rule{0pt}{1em}* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01}\\\\"), "") %>%
  str_replace_all("\\\\caption\\{.*\\}", "") %>%
  writeLines("outputs/tables/trans_recog_matrix.tex")


trans_recog %>%
  filter(is_trans) %>%
  count_prop(photo_guessed)


trans_recog %>%
  filter(is_trans) %>%
  pull(photo_guessed) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_trans_recog.tex")

trans_recog %>%
  filter(!is_trans) %>%
  pull(photo_guessed) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_non_trans_false_pos.tex", digits = 1)


# Gap between end of treatment round and outcome round ---------------------------------------------------------------

# Plot map with survey locations ---------------------------------------------------------------

# Sample location ---------------------------------------------------------------

# Convert gender into factor
df_clean_names <- df %>% janitor::clean_names()

# Get the midpoint of your location data for centering the map
lat_mid <- mean(range(df_clean_names$gps_latitude))
lon_mid <- mean(range(df_clean_names$gps_longitude))

# # Download the map around the midpoints from OpenStreetMap, zoom parameter is subjective and may need adjusting

# Register your Google Maps API key
rerun_map <- FALSE 

if (rerun_map) {
  
  register_google(key = #INSERT API HERE
  )
  
  chennai_map_4 <- get_map(source = "stadia", location = "Chennai", zoom = 12, maptype = "toner-lite")
  
  ggmap(chennai_map_4) +
    geom_point(data = df_clean_names %>% mutate(phase = str_replace(phase, "phase_", "Phase ")),
               aes(x = gps_longitude, y = gps_latitude), size = 2, alpha = 0.3) +
    labs(color = element_blank(), x = "Longitude", y = "Latitude")
  
  
  ggsave("outputs/figs/survey_locations.pdf", width = 8, height = 6)
  
}


# Sample descirptives ---------------------------------------------------------------

# Total sample size
total_n <- df %>% nrow()

total_n %>% write_stat("outputs/stats/total_n.tex", digits = 0)

# Sample sizes from phase 2
df %>% filter(phase == "phase_2") %>%
  count_prop(discuss_type)

df %>%
  # filter(phase == "phase_1") %>%
  count_prop(discuss_type)

df %>% count_prop(video_type)


# Did neighbours know each other?
group_relations$group_member_relation_list %>%
  map_lgl(~ "0" %in% .x) %>%
  mean_na() %>%
{1 - .} %>%
  write_percentage("outputs/stats/prop_neighbours_knew_each_other.tex")

# How well know each other?
group_relations %>%
  summarise(mean_na(close_family | other_family | friend)) %>%
  .[[1]] %>%
  write_percentage("outputs/stats/prop_neighbours_relations.tex")

group_relations %>%
  summarise(mean_na(neighbour)) %>%
  .[[1]] %>%
  write_percentage("outputs/stats/prop_neighbours_neighbour.tex")

df %>%
  summarise(mean_na(target_f=="F")) %>%
  .[[1]] %>%
  write_percentage("outputs/stats/prop_female.tex")


df %>%
  group_by(target_f) %>%
  count_prop(target_f, normally_receives_delivery)

# Proportion of females who normally receive delivery
df %>%
  filter(target_f == "F") %>%
  pull(normally_receives_delivery) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_receive_delivery_female.tex")

# Proportion of males who normally receive delivery
df %>%
  filter(target_f == "M") %>%
  pull(normally_receives_delivery) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_receive_delivery_male.tex")


# How often did choosing_only people saw other options
discuss_obs_choosing_only %>%
  count_prop(participants_saw) %>%
  pull(participants_saw) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_choosing_only_saw.tex", digits = 1)

discuss_obs_choosing_only %>%
  count_prop(participants_spoke, participants_spoke_about_option) %>%
  pull(participants_spoke_about_option) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_choosing_only_spoke.tex", digits = 1)


# How many others can NOT hear answers?
df %>%
  pull(others_could_hear_answers) %>%
  mean_na() %>%
{1 - .} %>%
  write_percentage("outputs/stats/prop_others_could_hear_answers.tex", digits = 0)

df %>%
  pull(others_could_hear_answers) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_others_could_hear_answers_rev.tex", digits = 0)

# How often did listener say something?
listener_obs %>%
  pull(listener_spoke_about_options) %>%
  mean_na() %>%
{1 - .} %>%
  write_percentage("outputs/stats/prop_listener_silent.tex", digits = 0)

cor_trans_12 <- r2_choices_num %>%
  select(ind_id, round, r2_choose_trans) %>%
  select(-matches("\\[")) %>%
  filter(!is.na(r2_choose_trans)) %>%
  group_by(ind_id) %>%
  arrange(ind_id, round) %>%
  mutate(trans_round = row_number()) %>%
  select(-round) %>%
  pivot_wider(names_from = trans_round, names_prefix = "trans_", values_from = r2_choose_trans) %>%
  ungroup %>%
  select(trans_1, trans_2) %$%
  cor.test(trans_1, trans_2)

cor_trans_12$estimate %>% write_stat("outputs/stats/cor_trans_12.tex")
cor_trans_12$conf.int[[1]] %>% write_stat("outputs/stats/cor_trans_12_low.tex")
cor_trans_12$conf.int[[2]] %>% write_stat("outputs/stats/cor_trans_12_high.tex")


# Audio consent - what proportion
df %>%
  filter(group_role == 1, discuss_type %in% c("discussion_full", "discussion_pair")) %>%
  pull(audio_refused) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_audio_consent.tex")

df %>%
  filter(group_role == 1, discuss_type %in% c("discussion_full", "discussion_pair")) %>%
  pull(audio_refused) %>%
  mean_na() %>%
  {1 - .} %>%
  write_percentage("outputs/stats/prop_audio_consent_yes.tex")

# Anonymous round checks
df %>% count_prop(an_check1) %>%
  mutate(an_check1 = an_check1 == 1) %>%
  get_mean(an_check1) %>%
  write_percentage("outputs/stats/prop_an_check1.tex", digits = 1)

df %>% count_prop(an_check2) %>%
  mutate(an_check2 = an_check2 == 1) %>%
  get_mean(an_check2) %>%
  write_percentage("outputs/stats/prop_an_check2.tex", digits = 1)

# Follow up - field discussion ---------------------------------------------------------------

df_follow_up %>%
  count_prop(discuss3) %>%
  mutate(discuss3 = replace_with_na(discuss3, -98)) %>%
  filter(!is.na(discuss_type)) %>%
  bar_chart(x = discuss_type, y = discuss3)


df_follow_up %>%
  filter(discuss_type == "control") %>%
  mutate(discuss3 = replace_with_na(discuss3, -98)) %>%
  get_mean(discuss3) %>%
  write_percentage("outputs/stats/prop_conv_trans.tex", digits = 0)


df_follow_up %>% count_prop(d3) %>%
  filter(d3 > 0) %>%
  filter(!is.na(discuss_type)) %>%
  bar_chart(x = discuss_type, y = d3)

# Purple scratchcard ---------------------------------------------------------------

df %>% select(matches("scratchcard|sc_")) %>%
  count_prop(sc_colour) %>%
  mutate(purple = sc_colour == "purple") %>%
  get_mean(purple) %>%
  write_percentage("outputs/stats/prop_purple.tex", digits = 1)

# Bar charts ---------------------------------------------------------------

r2_choices_num %>%
  filter(discuss_type == "control") %>%
  mutate(female = ifelse(female == 1, "Female", "Male")) %>%
  bar_chart(x = female, y = r2_choose_trans, fill = female, percent = TRUE) +
  scale_fill_brewer(palette = "Set1") +
  theme(legend.position = "none") +
  labs(x = element_blank(), y = "% chose transgender") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0, 0.5))

ggsave("outputs/figs/d_stats_female.pdf", width = 2, height = 3)

r2_choices_num %>%
  filter(discuss_type == "control") %>%
  mutate(hindu = ifelse(hindu == 1, "Hindu", "Not-Hindu")) %>%
  bar_chart(x = hindu, y = r2_choose_trans, fill = hindu, percent = TRUE) +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0, 0.5)) +
  theme(legend.position = "none") +
  labs(x = element_blank(), y = "% chose transgender")


ggsave("outputs/figs/d_stats_hindu.pdf", width = 2, height = 3)


r2_choices_num %>%
  filter(discuss_type == "control") %>%
  mutate(married = ifelse(married == 1, "Married", "Not Married")) %>%
  bar_chart(x = married, y = r2_choose_trans, fill = married, percent = TRUE) +
  scale_fill_brewer(palette = "Set3") +
  theme(legend.position = "none") +
  labs(x = element_blank(), y = "% chose transgender") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0, 0.5))

ggsave("outputs/figs/d_stats_married.pdf", width = 2, height = 3)

r2_choices_num %>%
  filter(discuss_type == "control") %>%
  mutate(hh_size_above_med = ifelse(hh_size_above_med, "Above med\nhh size", "Below med.\nhh size")) %>%
  bar_chart(x = hh_size_above_med, y = r2_choose_trans, fill = hh_size_above_med, percent = TRUE) +
  scale_fill_brewer(palette = "Accent") +
  theme(legend.position = "none") +
  labs(x = element_blank(), y = "% chose transgender") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0, 0.5))

ggsave("outputs/figs/d_stats_hh_size.pdf", width = 2, height = 3.3)

# Proportion of groups with at least one pro-trans member
prop_groups_pro_trans_member <- r2_choices_num %>% filter(discuss_type == "control") %>%
  group_by(group_id, ind_id) %>%
  summarise(n_trans = sum_na(r2_choose_trans)) %>%
  group_by(group_id) %>%
  summarise(n_people_chose_trans_twice = sum_na(n_trans == 2)) %>%
  ungroup %>%
  count_prop(n_people_chose_trans_twice > 0, return_count = TRUE)

prop_groups_pro_trans_member$prop[[2]] %>% write_percentage("outputs/stats/prop_groups_with_activist.tex")
